Poniższa analiza wykorzystuje bazę danych Rebricable skupiając się na obszarze motywów i zestawów. Analiza wizualizuje dane dotyczące liczby zestawów, liczby motywów, średniej liczby części w zestawie. Pokazuję również przyrost liczby części na przestrzeni lat z podziałem na motywy główne tj. takie do których przypisane są motywy potomne. W dalszej części analizy bliżej przyglądam się zestawowi ‘The lord of the rings’ i figurkom jakie on zawiera. Ostatnia cześc analizy dotyczy podziału wszystkich elementów na ich kolory.
W analizie widać wzrost badanych statystyk na przestrzeni lat. Zarówno liczba zestawów, liczba motywów, motywów głównych, średnia liczba części w zestawie jak i sumaryczna liczba części lego rośnie z biegiem lat, szczególnie gwałtownie w ostatnich kilku latach kiedy to wszystkie te statystki osiągają swój szczyt. Pochylając się nad figurarkami z zestawy ‘The lord of the rings’ nieoczekiwanie żadna z głównych postaci nie jest najbardziej popularną figurką w tym zestawie. Przodują w tej statystyce postacie drugoplanowe ‘Uruk-Hai’ oraz ‘Mordor-Orc’. Wynika to z tego, że postacie te występują w kilku wersjach, a zestaw danych został zmodyfikowany do podstawowych nazw figurek.Statystyki kolorów wskazują na duża dywersyfikacje kolorów. Jedynie biały oraz czarny kolor występuje w około 9% całego zbioru, reszta kolorów nie przekracza 6%.
sets <- read.csv("rebrickable\\sets.csv", header=TRUE) %>% select(0:5)
themes <- read.csv("rebrickable\\themes.csv", header=TRUE) %>% select(0:3)
themes$parent_id <- dplyr::coalesce(themes$parent_id, themes$id)
themes_parent <- themes %>%
left_join(themes, by = c("parent_id" = "id")) %>% select(id = id, name = name.x, parent_id, parent_name = name.y)
sets_themes <- merge(sets, themes_parent, by.x = "theme_id", by.y = "id", suffixes = c("_sets", "_theme"))
sets_year_mean <- sets_themes %>%
group_by(year) %>%
summarise(total_parts = mean(num_parts), number_of_sets = n(), number_of_themes = n_distinct(theme_id), number_of_parent_themes = n_distinct(parent_id),na.rm = TRUE)
theme_set_plot_basic <- ggplot(sets_year_mean, aes(x = year)) +
geom_line(aes(y = total_parts, color = "Średnia liczba części w zestawie")) +
geom_line(aes(y = number_of_sets, color = "Liczba zestawów")) +
geom_line(aes(y = number_of_themes, color = "Liczba motywów")) +
geom_line(aes(y = number_of_parent_themes, color = "Liczba motywów głównych")) +
labs(title = "Zestawy i motywy", y="",x = "Rok", color = 'Statystyki') +
scale_color_manual(values = c("Średnia liczba części w zestawie" = "blue", "Liczba zestawów" = "green", "Liczba motywów" = "orange", "Liczba motywów głównych" = "red"))
ggplotly(theme_set_plot_basic)
sets_stats <- sets_year_mean %>% gather(key = "param", value = "value",2:5) %>% select(-(na.rm))
my_labeller <- as_labeller(c(total_parts = "Średnia liczba części w zestawie", number_of_sets = "Liczba zestawów", number_of_themes = "Liczba motywów", number_of_parent_themes = "Liczba motywów głównych"))
sets_density_plot <- ggplot(sets_stats, aes(x = value)) + geom_density() + facet_wrap(~param, ncol=1, scales = "free", labeller = my_labeller)
ggplotly(sets_density_plot)
cor_plot <- ggplot(sets_year_mean, aes(x = total_parts, y = number_of_sets)) +
geom_point() +
geom_smooth(method = "lm") +
labs(x = "Średnia liczba części w zestawie", y = "Liczba zestawów")
cor_test <- cor.test(sets_year_mean$total_parts, sets_year_mean$number_of_sets, method = "pearson")
cor_test_results <- data.frame(
Estimate = cor_test$estimate,
P.Value = cor_test$p.value,
Confidence.Interval = paste0("[", round(cor_test$conf.int[1], 3), ", ", round(cor_test$conf.int[2], 3), "]"),
Method = "Pearson's Correlation"
)
ggplotly(cor_plot)
kable(cor_test_results,
caption = "Wyniki Testu Korelacji Pearsona",
align = 'c') %>%
kable_styling(font_size = 12, latex_options = c("striped", "scale_down")) %>%
column_spec(1, width = "3cm") %>%
column_spec(2, width = "3cm") %>%
column_spec(3, width = "4cm") %>%
column_spec(4, width = "3cm")
| Estimate | P.Value | Confidence.Interval | Method | |
|---|---|---|---|---|
| cor | 0.7213798 | 0 | [0.59, 0.815] | Pearson’s Correlation |
Poniższy wykres wizualizuje liczbę części w zestawach w kolejnych latach, z podziałem na motywy główne - motywy które mogę być motywem rodzicem innego motywu.
sets_themes_sum <- sets_themes %>%
group_by(parent_name, year) %>%
summarise(total_parts = sum(num_parts, na.rm = TRUE))
theme_sum_plot <- ggplot(data = sets_themes_sum, aes(x = year, y = total_parts, fill= parent_name)) +
geom_area() +
labs( y="Liczba części",x = "Rok", fill = 'Motywy') +
scale_y_log10()
theme_sum_plot <- theme_sum_plot + scale_y_continuous(labels = label_comma())
ggplotly(theme_sum_plot)
st(sets_themes %>% spread(parent_name, num_parts) %>% select(-(theme_id), -(parent_id), -(year)))
| Variable | N | Mean | Std. Dev. | Min | Pctl. 25 | Pctl. 75 | Max |
|---|---|---|---|---|---|---|---|
| 4 Juniors | 54 | 43 | 43 | 0 | 12 | 66 | 175 |
| Advent | 60 | 265 | 63 | 116 | 232 | 310 | 500 |
| Adventurers | 83 | 102 | 155 | 0 | 20 | 120 | 781 |
| Agents | 28 | 428 | 316 | 0 | 224 | 538 | 1204 |
| Alpha Team | 31 | 139 | 140 | 13 | 30 | 190 | 480 |
| Angry Birds | 6 | 398 | 294 | 74 | 196 | 562 | 859 |
| Animal Crossing | 5 | 0 | 0 | 0 | 0 | 0 | 0 |
| Aquazone | 37 | 178 | 193 | 0 | 28 | 259 | 854 |
| Architecture | 60 | 656 | 539 | 57 | 312 | 776 | 2276 |
| Atlantis | 24 | 222 | 244 | 0 | 52 | 288 | 1008 |
| Avatar | 9 | 614 | 314 | 179 | 528 | 761 | 1217 |
| Avatar: The Last Airbender | 2 | 562 | 228 | 401 | 482 | 643 | 724 |
| Batman | 50 | 618 | 949 | 0 | 52 | 628 | 3989 |
| Belville | 88 | 87 | 82 | 0 | 31 | 113 | 457 |
| Ben 10 | 6 | 20 | 3.2 | 15 | 18 | 22 | 23 |
| Bionicle | 460 | 71 | 124 | 0 | 2 | 58 | 876 |
| Boat | 21 | 136 | 88 | 46 | 81 | 194 | 365 |
| Books | 981 | 8.2 | 22 | 0 | 0 | 7 | 251 |
| Brick Sketches | 8 | 160 | 36 | 115 | 134 | 178 | 214 |
| Brickheadz | 152 | 197 | 117 | 0 | 119 | 246 | 708 |
| BrickLink Designer Program | 28 | 1675 | 1057 | 372 | 913 | 2096 | 4074 |
| Building Set with People | 30 | 127 | 120 | 39 | 52 | 120 | 526 |
| Bulk Bricks | 136 | 52 | 30 | 0 | 25 | 70 | 130 |
| Cars | 28 | 199 | 219 | 0 | 51 | 282 | 852 |
| Castle | 267 | 164 | 279 | 0 | 24 | 194 | 2455 |
| Chinese Traditional Festivals | 25 | 556 | 600 | 0 | 87 | 882 | 1798 |
| Christmas | 32 | 290 | 431 | 0 | 42 | 222 | 1477 |
| City | 851 | 209 | 266 | 0 | 26 | 302 | 2010 |
| Classic | 67 | 512 | 531 | 0 | 63 | 886 | 1800 |
| Clikits | 83 | 54 | 56 | 0 | 13 | 72 | 285 |
| Collectible Minifigures | 858 | 7 | 6.7 | 0 | 6 | 8 | 121 |
| Creator | 477 | 411 | 680 | 0 | 56 | 500 | 5923 |
| DC Super Hero Girls | 12 | 309 | 307 | 5 | 144 | 371 | 1079 |
| Designer Sets | 47 | 201 | 247 | 0 | 28 | 286 | 784 |
| Dimensions | 68 | 90 | 70 | 6 | 48 | 98 | 268 |
| Dino | 7 | 319 | 243 | 80 | 179 | 372 | 793 |
| Dino 2010 | 6 | 249 | 272 | 4 | 50 | 345 | 721 |
| Dino Attack | 6 | 213 | 224 | 4 | 53 | 274 | 608 |
| Dinosaurs | 17 | 25 | 8.9 | 0 | 20 | 31 | 37 |
| Discovery | 8 | 365 | 342 | 0 | 123 | 558 | 865 |
| Disney | 76 | 468 | 828 | 0 | 97 | 512 | 4837 |
| Disney’s Mickey Mouse | 5 | 89 | 39 | 27 | 88 | 110 | 130 |
| Disney Princess | 87 | 177 | 184 | 0 | 44 | 264 | 1016 |
| DOTS | 92 | 239 | 282 | 0 | 34 | 402 | 1165 |
| Dreamzzz | 19 | 372 | 420 | 0 | 42 | 488 | 1389 |
| Duplo | 1293 | 35 | 38 | 0 | 9 | 49 | 386 |
| Educational and Dacta | 678 | 188 | 418 | 0 | 7 | 154 | 5200 |
| Elves | 42 | 332 | 279 | 0 | 84 | 503 | 1016 |
| Exo-Force | 52 | 195 | 276 | 0 | 20 | 250 | 1458 |
| Fabuland | 105 | 35 | 38 | 2 | 7 | 47 | 159 |
| Factory | 7 | 1223 | 726 | 606 | 894 | 1210 | 2792 |
| FIRST LEGO League | 95 | 489 | 737 | 0 | 0 | 864 | 2175 |
| Forma | 4 | 98 | 130 | 31 | 32 | 98 | 293 |
| Freestyle | 64 | 249 | 247 | 0 | 39 | 407 | 1010 |
| Friends | 549 | 215 | 290 | 0 | 32 | 305 | 2010 |
| Fusion | 4 | 238 | 24 | 212 | 220 | 258 | 262 |
| Gabby’s Dollhouse | 4 | 194 | 205 | 58 | 81 | 222 | 498 |
| Galidor | 21 | 10 | 7.8 | 0 | 4 | 13 | 29 |
| Games | 47 | 175 | 113 | 0 | 97 | 252 | 425 |
| Gear | 3265 | 2.7 | 9.7 | 0 | 0 | 0 | 141 |
| Ghostbusters | 2 | 2599 | 2889 | 556 | 1578 | 3620 | 4642 |
| Harry Potter | 153 | 537 | 944 | 0 | 64 | 599 | 6020 |
| Hero Factory | 111 | 60 | 68 | 0 | 19 | 64 | 394 |
| Hidden Side | 32 | 307 | 343 | 6 | 9 | 440 | 1475 |
| Hobby Sets | 9 | 399 | 341 | 134 | 197 | 434 | 982 |
| Homemaker | 32 | 151 | 103 | 23 | 75 | 188 | 474 |
| Icons | 42 | 2426 | 2439 | 0 | 948 | 2526 | 10001 |
| Indiana Jones | 23 | 377 | 351 | 0 | 104 | 538 | 1545 |
| Inventor | 4 | 290 | 49 | 243 | 253 | 325 | 344 |
| Island Xtreme Stunts | 13 | 81 | 98 | 4 | 13 | 93 | 336 |
| Juniors | 68 | 129 | 80 | 30 | 75 | 150 | 480 |
| Jurassic World | 89 | 232 | 403 | 0 | 30 | 289 | 3120 |
| Legends of Chima | 144 | 229 | 851 | 0 | 24 | 174 | 9987 |
| LEGO Art | 15 | 3519 | 2456 | 805 | 2378 | 3792 | 11695 |
| LEGO Brand Store | 392 | 55 | 66 | 0 | 16 | 58 | 536 |
| LEGO Exclusive | 59 | 467 | 421 | 0 | 69 | 852 | 1312 |
| LEGO Ideas and CUUSOO | 67 | 1070 | 921 | 10 | 326 | 1788 | 3955 |
| LEGO Originals | 21 | 1.5 | 6.3 | 0 | 0 | 0 | 29 |
| Legoland | 172 | 102 | 98 | 0 | 41 | 134 | 596 |
| Legoland Parks | 55 | 166 | 205 | 3 | 68 | 180 | 1336 |
| Make & Create | 118 | 448 | 406 | 0 | 138 | 651 | 2000 |
| Master Builder Academy | 21 | 166 | 162 | 0 | 0 | 222 | 675 |
| Mindstorms | 143 | 116 | 262 | 0 | 1 | 36 | 1712 |
| Minecraft | 117 | 354 | 375 | 0 | 72 | 511 | 2863 |
| Minions | 9 | 184 | 275 | 0 | 54 | 136 | 876 |
| Minitalia | 19 | 94 | 75 | 0 | 43 | 120 | 307 |
| Mixels | 121 | 41 | 30 | 0 | 0 | 64 | 74 |
| Model Team | 16 | 683 | 377 | 285 | 422 | 786 | 1748 |
| Modular Buildings | 19 | 2420 | 602 | 1250 | 2190 | 2670 | 4002 |
| Modulex | 34 | 69 | 50 | 10 | 10 | 100 | 200 |
| Monkie Kid | 47 | 851 | 666 | 1 | 304 | 1370 | 2438 |
| Monster Fighters | 14 | 392 | 563 | 0 | 40 | 443 | 2064 |
| Nexo Knights | 111 | 169 | 263 | 3 | 20 | 216 | 1426 |
| Ninja | 31 | 95 | 152 | 0 | 23 | 110 | 698 |
| Ninjago | 533 | 261 | 548 | 0 | 14 | 313 | 6163 |
| Other | 207 | 105 | 273 | 0 | 11 | 98 | 2547 |
| Overwatch | 13 | 263 | 236 | 0 | 39 | 419 | 730 |
| Pharaoh’s Quest | 9 | 207 | 273 | 29 | 31 | 213 | 795 |
| Pirates | 91 | 201 | 282 | 0 | 28 | 260 | 1668 |
| Pirates of the Caribbean | 17 | 386 | 594 | 0 | 21 | 468 | 2294 |
| Power Functions | 33 | 1.8 | 2.3 | 0 | 1 | 1 | 10 |
| Power Miners | 23 | 189 | 203 | 0 | 21 | 260 | 707 |
| Primo | 136 | 8.7 | 10 | 1 | 2 | 11 | 78 |
| Prince of Persia | 6 | 310 | 301 | 52 | 85 | 444 | 821 |
| Promotional | 246 | 108 | 107 | 0 | 29 | 164 | 583 |
| Quatro | 11 | 48 | 35 | 1 | 19 | 74 | 101 |
| Racers | 284 | 153 | 248 | 0 | 25 | 143 | 1484 |
| Rock Raiders | 17 | 83 | 120 | 0 | 22 | 91 | 411 |
| Scala | 73 | 48 | 57 | 2 | 11 | 59 | 312 |
| Scooby-Doo | 10 | 191 | 277 | 0 | 18 | 258 | 861 |
| Sculptures | 10 | 2099 | 1198 | 286 | 1614 | 2690 | 4502 |
| Seasonal | 303 | 130 | 214 | 0 | 32 | 150 | 1517 |
| Service Packs | 781 | 27 | 37 | 0 | 4 | 35 | 453 |
| Sonic The Hedgehog | 5 | 495 | 209 | 292 | 376 | 615 | 802 |
| Space | 334 | 147 | 180 | 0 | 25 | 211 | 1012 |
| Speed Champions | 65 | 323 | 224 | 0 | 176 | 485 | 963 |
| Speed Racer | 4 | 362 | 170 | 237 | 241 | 425 | 600 |
| SpongeBob SquarePants | 14 | 313 | 134 | 95 | 216 | 417 | 579 |
| Sports | 173 | 63 | 110 | 0 | 4 | 71 | 570 |
| Spybotics | 4 | 236 | 25 | 202 | 228 | 248 | 261 |
| Star Wars | 935 | 369 | 699 | 0 | 39 | 441 | 7541 |
| Stranger Things | 3 | 780 | 1305 | 4 | 27 | 1168 | 2287 |
| Studios | 55 | 53 | 102 | 0 | 4 | 28 | 503 |
| Super Heroes DC | 176 | 209 | 308 | 0 | 15 | 306 | 2526 |
| Super Heroes Marvel | 293 | 311 | 522 | 0 | 44 | 376 | 4049 |
| Super Mario | 147 | 208 | 431 | 0 | 16 | 212 | 2807 |
| System | 464 | 47 | 101 | 0 | 4 | 47 | 917 |
| Technic | 558 | 474 | 646 | 0 | 79 | 601 | 4108 |
| Teenage Mutant Ninja Turtles | 23 | 311 | 285 | 0 | 51 | 554 | 888 |
| The Hobbit and Lord of the Rings | 40 | 353 | 458 | 0 | 31 | 470 | 2362 |
| The LEGO Movie | 74 | 390 | 599 | 4 | 42 | 471 | 3178 |
| The Lone Ranger | 8 | 311 | 290 | 20 | 58 | 602 | 703 |
| The Powerpuff Girls | 2 | 184 | 57 | 144 | 164 | 204 | 224 |
| Time Cruisers | 9 | 194 | 142 | 58 | 82 | 239 | 512 |
| Town | 765 | 113 | 159 | 0 | 24 | 150 | 2013 |
| Toy Story | 21 | 187 | 159 | 18 | 90 | 230 | 586 |
| Train | 244 | 148 | 190 | 0 | 7.8 | 206 | 918 |
| Trolls: World Tour | 9 | 274 | 154 | 61 | 170 | 397 | 504 |
| Unikitty! | 26 | 88 | 142 | 0 | 15 | 96 | 515 |
| Universal Building Set | 412 | 167 | 228 | 0 | 15 | 243 | 1201 |
| Universe | 2 | 30 | 36 | 5 | 18 | 43 | 56 |
| Value Packs | 20 | 0.05 | 0.22 | 0 | 0 | 0 | 1 |
| VIDIYO | 50 | 71 | 181 | 0 | 2 | 46 | 982 |
| Vikings | 8 | 382 | 318 | 76 | 182 | 524 | 1036 |
| Western | 22 | 173 | 207 | 0 | 20 | 255 | 688 |
| X-Pod | 24 | 42 | 18 | 0 | 37 | 53 | 64 |
| Xtra | 19 | 25 | 12 | 4 | 14 | 34 | 46 |
| Znap | 19 | 90 | 90 | 22 | 26 | 134 | 268 |
minifigs <- read.csv("rebrickable\\minifigs.csv", header=TRUE) %>% select(0:3)
inv_minifigs <- read.csv("rebrickable\\inventory_minifigs.csv", header=TRUE)
inv <- read.csv("rebrickable\\inventories.csv", header=TRUE)
lotr = sets_themes %>% filter(name_theme == "The Lord of the Rings")
df_minifig_inv <- merge(minifigs, inv_minifigs, by = "fig_num")
df_minifig_inv <- merge(df_minifig_inv, inv, by.x = "inventory_id", by.y = 'id')
df_minifig_lotr <- merge(df_minifig_inv, lotr, by ="set_num", suffixes = c("_minifig", "_set"))
df_minifig_lotr$name <- gsub("Gandalf.*", "Gandalf", df_minifig_lotr$name)
df_minifig_lotr$name <- gsub(" -.*", "", df_minifig_lotr$name)
df_minifig_lotr$name <- gsub("Uruk-Hai ", "Uruk-Hai", df_minifig_lotr$name)
df_minifigs_lotr_gruped <- df_minifig_lotr %>% group_by(name) %>% summarise(minifg_count = n()) %>% arrange(desc(minifg_count)) %>% slice_head(n = 10)
minifigs_plot <- ggplot(df_minifigs_lotr_gruped, aes(x = name, y = minifg_count)) +
geom_bar(stat = 'identity', alpha = 1, width = 0.6) +
labs(
title = "10 najbardziej popularnych figurek w zestawie 'The Lord of the Rings'",
x = "Postać",
y = "Liczba zestawów",
fill = "Color"
) +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
ggplotly(minifigs_plot)
colorss <- read.csv("rebrickable\\colors.csv", header=TRUE)
elements <- read.csv("rebrickable\\elements.csv", header=TRUE)
elements_grouped <- elements %>%
group_by(color_id) %>%
summarise(color_count = n()) %>%
mutate(color_percentage = (color_count / sum(color_count)) * 100)
color_elements <- merge(colorss, elements_grouped, by.x = "id", by.y = "color_id")
color_elements$rgb <- paste0("#", color_elements$rgb)
color_elements <- color_elements %>%
filter(name != "[No Color/Any Color]", name != "[Unknown]") %>%
arrange(desc(color_count)) %>%
slice_head(n = 15) %>%
arrange(name)
color_element_plot <- ggplot(color_elements, aes(x = name, y = color_percentage, fill = name)) +
geom_bar(stat = 'identity', alpha = 1, width = 0.4) +
scale_fill_manual(values = color_elements$rgb) +
coord_flip() +
labs(
title = "15 najbardziej popularnych kolorów wśród pojedynczych elementów",
x = "Kolor",
y = "Procent w zestawie elementów",
fill = "Color"
) +
theme_bw() +
theme(
panel.background = element_rect(fill = "#EDEDED"),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.spacing = unit(0.1, "lines"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
ggplotly(color_element_plot)